Packages set up

load data sets

#data(field)
#dim(field)
# head(field)
data(veggyraw)
data(veggy)

This dataset is a set of pixel count measurements to characterize green and red area per pot.

Example image of germinating plants and red flags of those unsuccessful pots

Example image of germinating plants and red flags of those unsuccessful pots

Verify replicability

## check duplicated image pots
veggyraw=mutate(veggyraw, indexrep=paste(site,qp,pos,day,sep='_'))

## Number of pots used for replicability analysis
dim(veggyraw[duplicated(veggyraw$indexrep),]) # There are 790 in total from both experiments
## [1] 790  13
## Run the test for replicability
library(MCMCglmm)
## Loading required package: Matrix
## Loading required package: coda
## Loading required package: ape
lmm=MCMCglmm(data=veggyraw[duplicated(veggyraw$indexrep),], countgreen ~1, random = ~ indexrep, family = 'poisson',verbose=F)
print ( h2MCMC(lmm,randomname = 'indexrep') )
## $Mode
##      var1 
## 0.9974265 
## 
## $HPD
##          lower     upper
## var1 0.9956802 0.9984263
## attr(,"Probability")
## [1] 0.95

Clean data by removing duplicate records from duplicated images

## Produce cleaner data than veggyraw
## Since there are duplicates but the replicability is >99%, generate average of
## counts for two pots of the same identity (can be due to two pictures per
## tray/day). Necessary for merge with master dataset later

# veggy %>% mutate( indexrep=paste(site,qp,pos,day,sep='_')) %>%
#           group_by(indexrep) %>% summarise(site=unique(site),
#                                                qp=unique(qp),
#                                                pos=unique(pos),
#                                                trayid=unique(trayid),
#                                                rep=unique(rep),
#                                                indpop=unique(indpop),
#                                                water=unique(water),
#                                                id=unique(id),
#                                                day=unique(day),
#                                                countgreen=mean(countgreen),
#                                                countred=mean(countred)
#                                            ) ->veggy
# veggy %>% head()
# veggy %>% tail()
# dim(veggy)
# 
# veggy= veggy %>% mutate( potindex=paste(site,qp,pos,id,sep="_")) %>% select(-indexrep)
# 
# devtools::use_data(veggy,overwrite = T) # Save the data for the package

data(veggy)

This is not run because takes some time, instead I load the data already produced

Detecting unsuccessful pots

# just get the total number of red points
red=veggy %>% group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% summarize(redsum=sum(countred)) %>% mutate(identifier=paste(sep="_",site,qp,pos)) ### Probably better continuous, because a posteriory one can 
fvals<-sapply(seq(1e4,1e6,by=1000),function(x){
  # cat(x)
  # cat("->")
  # cat(
    summary(aov(red$redsum ~ red$redsum >x))[[1]][["F value"]][1]
    # )
  # cat("\n")

})

plot(fvals ~ seq(1e4,1e6,by=1000), pch=19, ylab='F statistic from ANOVA', xlab='red pixel sum threshold',cex=0.2)

seq(1e4,1e6,by=1000)[which(fvals==max(fvals,na.rm=T))] # =152000
## [1] 152000
table(red$redsum > 152000)
## 
## FALSE  TRUE 
## 22780  1967
badflags = red %>% filter(redsum > 152000) %>% select(identifier)
## Adding missing grouping variables: `site`, `qp`, `pos`, `trayid`, `rep`, `indpop`, `water`
badflags=unique(badflags$identifier)
length(badflags)
## [1] 1967

By calculating F statistic, we calculate the variance between two groups of pots whose pixels are counted. For that several thresholds are tried, the one that separates better the two distribution is the one that will be used

Modeling trajectories of germination and growth

# veg<-
#   veggy %>%
# 
#   mutate(identifier=paste(sep="_",site,qp,pos)) %>% # get an indentifier variable
#   filter( ! identifier  %in% badflags) %>% # remove those pots with bad identifiers
#   # filter( ! identifier  %in% badflagsgreen) %>% # filter if there is not enough green
# 
# 
#   mutate(starting=startday(site)) %>% # add the start day of the experiment ina per row basis for later calculations
#   mutate(daycount= fn(day - as.Date(starting) ) )%>% #  trick to filter the 40 first dates depending on experiment
#   filter(daycount <60) %>% # 40 days because we started the thinning like 1 month after they started germination, probably would be better to do it in a per pot basis.
# 
#   group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% # group observations by pot to analyse each time series
#   summarize(
#             ger.a=fitsigmoid(countgreen,daycount,parameter='a'),
#             ger.b=fitsigmoid(countgreen,daycount,parameter='b'),
#             ger.c=fitsigmoid(countgreen,daycount,parameter='c')
#             )
#             #,
#             # germi=fitgermination(countgreen,daycount),  # Probably the regression one is not so nice.
#             # germir2=r2fitgermination(countgreen,daycount),
#             # germip=pfitgermination(countgreen,daycount),
#             # firstgreen=firstgreen(y=countgreen,x=daycount)  # first green pixels is kind of arbitrary
#             # )
# 
data(veg)

Plotting the sigmoidal curves of 1000 pots

vegh<-head(veg,1000) %>% filter(ger.a != "NA")

plot( ylab='Predicted # pixels',xlab='Days of experiment',
  ylim=c(0,50000),
  y=
getsigmoid(
  a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[2],
  b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[2],
  c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[2]
),
x=1:40,
type='l'
)
for( i in 1:nrow(vegh)){
lines(  y=
getsigmoid(
  a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[i],
  b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[i],
  c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[i]
),
x=1:40,
col=transparent('black')
)
}